#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Last Update:

'''docstring
'''

__revision__ = '0.1'
__author__ = 'lxd'
import re
#import urllib2
#import socket

#def web(url):
    #socket.setdefaulttimeout(3)
    #opener = urllib2.build_opener()
    #opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; zh-CN; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15')]
    #return opener.open(url)

def clean_html(data):
    """清除html标签
    >>> data = '<div>some</div>'
    >>> clean_html(data)
    'some'
    """
    data = re.sub(r'</p>', '\n', data)#加上分段
    data = re.sub(r'\&[a-zA-Z]{1,10};', '', data)#过滤&lt;
    data = re.sub(r'<[^>]*>', '', data)#过滤<>
    data = re.sub(r'[(/>)<]', '', data)#过滤单独的<>   
    return data

def open_file(name):
    with open(name, 'r') as f:
        return f.read()

def save_file(name, content):
    with open(name, 'w') as f:
        f.write(content)

if __name__ == '__main__':
    pass

